import requests
import json

import time
from lxml import etree


class buyhouse():
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"}
        self.start_url = "https://jian.lianjia.com/ershoufang/rs/"
        self.temp_url = "https://jian.lianjia.com/ershoufang/pg{}/"

    def get_response(self, url):
        res = requests.get(url, headers=self.headers)
        return res.content

    def trans_res_html(self, res):
        html = etree.HTML(res)
        return html

    def par_html_data(self, html):
        li_list = html.xpath('//ul[@class="sellListContent"]/li')
        for li in li_list:
            item = {}
            item["regoin"] = li.xpath('.//div[@class="positionInfo"]/a/text()')[0]
            item["houseIcon"] = li.xpath('.//div[@class="houseInfo"]/text()')[0]
            item["totalPrice"] = li.xpath('.//div[@class="priceInfo"]//div[@class="totalPrice"]//text()')[0] + "万"
            item["unitPrice"] = li.xpath('.//div[@class="unitPrice"]//text()')[0]
            f = open("lianjia.text", "a", encoding="utf-8")
            f.write(json.dumps(item, ensure_ascii=False, indent=2))
            f.close()
            print(item)

    def run(self):
        res = self.get_response(self.start_url)
        html = self.trans_res_html(res)
        self.par_html_data(html)
        # 寻找下一页
        cur = 1
        while cur < 100:
            cur += 1
            # next_url_list = html.xpath('//a[@class="next"]/@href')
            next_url= self.temp_url.format(cur)

            res = self.get_response(next_url)
            html = self.trans_res_html(res)
            self.par_html_data(html)
            time.sleep(0.5)
            # next_url_list = html.xpath('//a[text()="下一页"]/@href')



if __name__ == '__main__':
    lianjia = buyhouse()
    lianjia.run()
